Ejercicio Regresión armónica dinámica

Eddie Aguilar Ernesto Morales

library("easypackages")
Warning: package 'easypackages' was built under R version 4.2.3
packages("tidyverse","fpp3", "tsibble", "feasts","fable", "patchwork")
library(plotly)
vic_elec
# A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
   Time                Demand Temperature Date       Holiday
   <dttm>               <dbl>       <dbl> <date>     <lgl>  
 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE   
 7 2012-01-01 03:00:00  3694.        20.1 2012-01-01 TRUE   
 8 2012-01-01 03:30:00  3562.        19.6 2012-01-01 TRUE   
 9 2012-01-01 04:00:00  3433.        19.1 2012-01-01 TRUE   
10 2012-01-01 04:30:00  3359.        19.0 2012-01-01 TRUE   
# … with 52,598 more rows

Usando datos cada hora:

(vic_elec_hour <- index_by(vic_elec, time = ~ floor_date(., unit="hour")) |> 
  summarise(demand = sum(Demand),
         mean_temp = mean(Temperature),
         max_temp = max(Temperature),
         holiday = any(Holiday)) |> 
    mutate(
      day_type = case_when(
        holiday ~ "holiday",
        wday(time) %in% 2:6 ~ "weekday",
        TRUE ~ "weekend"
      )))
# A tsibble: 26,304 x 6 [1h] <Australia/Melbourne>
   time                demand mean_temp max_temp holiday day_type
   <dttm>               <dbl>     <dbl>    <dbl> <lgl>   <chr>   
 1 2012-01-01 00:00:00  8646.      21.2     21.4 TRUE    holiday 
 2 2012-01-01 01:00:00  7927.      20.6     20.7 TRUE    holiday 
 3 2012-01-01 02:00:00  7902.      20.3     20.4 TRUE    holiday 
 4 2012-01-01 03:00:00  7256.      19.8     20.1 TRUE    holiday 
 5 2012-01-01 04:00:00  6793.      19.0     19.1 TRUE    holiday 
 6 2012-01-01 05:00:00  6636.      18.7     18.8 TRUE    holiday 
 7 2012-01-01 06:00:00  6548.      18.7     18.8 TRUE    holiday 
 8 2012-01-01 07:00:00  6865.      19.6     20.1 TRUE    holiday 
 9 2012-01-01 08:00:00  7300.      21.8     22.6 TRUE    holiday 
10 2012-01-01 09:00:00  8002.      24.6     25.2 TRUE    holiday 
# … with 26,294 more rows
p <- autoplot(vic_elec_hour, demand, color = "blue")

ggplotly(p, dynamicTicks = TRUE) |> rangeslider()
vic_elec_hour |> 
  ggplot(aes(x = mean_temp, y = demand, 
             color = holiday)) +
  geom_point(alpha = 0.5)

vic_elec_hour |> 
  ggplot(aes(x = mean_temp, y = demand, 
             color = day_type)) +
  geom_point(alpha = 0.5)

(train <- filter(vic_elec_hour, year(time) %in% 2012:2013))
# A tsibble: 17,544 x 6 [1h] <Australia/Melbourne>
   time                demand mean_temp max_temp holiday day_type
   <dttm>               <dbl>     <dbl>    <dbl> <lgl>   <chr>   
 1 2012-01-01 00:00:00  8646.      21.2     21.4 TRUE    holiday 
 2 2012-01-01 01:00:00  7927.      20.6     20.7 TRUE    holiday 
 3 2012-01-01 02:00:00  7902.      20.3     20.4 TRUE    holiday 
 4 2012-01-01 03:00:00  7256.      19.8     20.1 TRUE    holiday 
 5 2012-01-01 04:00:00  6793.      19.0     19.1 TRUE    holiday 
 6 2012-01-01 05:00:00  6636.      18.7     18.8 TRUE    holiday 
 7 2012-01-01 06:00:00  6548.      18.7     18.8 TRUE    holiday 
 8 2012-01-01 07:00:00  6865.      19.6     20.1 TRUE    holiday 
 9 2012-01-01 08:00:00  7300.      21.8     22.6 TRUE    holiday 
10 2012-01-01 09:00:00  8002.      24.6     25.2 TRUE    holiday 
# … with 17,534 more rows
# maximos:
# year = 4380
# week = 84
# day = 12

fit <- train |>  
  model(
    Fourier = TSLM(log(demand) ~ fourier(period = "year", K = 438) + 
                      fourier(period = "week", K = 42) + 
                      fourier(period = "day", K = 6)))
glance(fit)
# A tibble: 1 × 15
  .model  r_squa…¹ adj_r…²  sigma2 stati…³ p_value    df log_lik     AIC    AICc
  <chr>      <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <int>   <dbl>   <dbl>   <dbl>
1 Fourier    0.876   0.869 0.00444    122.       0   961  23128. -94120. -94008.
# … with 5 more variables: BIC <dbl>, CV <dbl>, deviance <dbl>,
#   df.residual <int>, rank <int>, and abbreviated variable names ¹​r_squared,
#   ²​adj_r_squared, ³​statistic
fc <- forecast(fit, h = "1 year")
Warning: prediction from a rank-deficient fit may be misleading
fc
# A fable: 8,766 x 4 [1h] <Australia/Melbourne>
# Key:     .model [1]
   .model  time                           demand  .mean
   <chr>   <dttm>                         <dist>  <dbl>
 1 Fourier 2014-01-01 00:00:00   t(N(9, 0.0047))  8399.
 2 Fourier 2014-01-01 01:00:00   t(N(9, 0.0047))  8329.
 3 Fourier 2014-01-01 02:00:00   t(N(9, 0.0047))  8063.
 4 Fourier 2014-01-01 03:00:00 t(N(8.9, 0.0047))  7697.
 5 Fourier 2014-01-01 04:00:00 t(N(8.9, 0.0047))  7484.
 6 Fourier 2014-01-01 05:00:00   t(N(9, 0.0047))  7727.
 7 Fourier 2014-01-01 06:00:00 t(N(9.1, 0.0047))  8572.
 8 Fourier 2014-01-01 07:00:00 t(N(9.2, 0.0047))  9740.
 9 Fourier 2014-01-01 08:00:00 t(N(9.3, 0.0047)) 10537.
10 Fourier 2014-01-01 09:00:00 t(N(9.3, 0.0047)) 10569.
# … with 8,756 more rows
ggplot() +
  geom_line(vic_elec_hour, mapping = aes(x = time, y = demand)) + 
  geom_line(fc, mapping = aes(x = time, y = .mean), color = "blue", alpha = 0.8)